{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "ac054481-ca33-4e7d-addf-bd8575658172",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-24T07:39:33.661836Z",
     "iopub.status.busy": "2022-05-24T07:39:33.661218Z",
     "iopub.status.idle": "2022-05-24T07:39:41.480004Z",
     "shell.execute_reply": "2022-05-24T07:39:41.479347Z",
     "shell.execute_reply.started": "2022-05-24T07:39:33.661768Z"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# 导入包\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "25b2179d-e0bd-4b22-98d6-67be38d826c4",
   "metadata": {},
   "source": [
    "## 1. 正确读取数据 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "9c38555d-a8fc-4a7c-abb2-dac70b5512e6",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-24T07:39:48.797394Z",
     "iopub.status.busy": "2022-05-24T07:39:48.796970Z",
     "iopub.status.idle": "2022-05-24T07:39:48.829615Z",
     "shell.execute_reply": "2022-05-24T07:39:48.828399Z",
     "shell.execute_reply.started": "2022-05-24T07:39:48.797367Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>leixing</td>\n",
       "      <td>nianfen</td>\n",
       "      <td>licheng</td>\n",
       "      <td>didian</td>\n",
       "      <td>shoujia</td>\n",
       "      <td>yuanjia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>凯迪拉克ATS-L 2016款 28T 时尚型</td>\n",
       "      <td>2016年</td>\n",
       "      <td>2.5万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>16.77万</td>\n",
       "      <td>34.60万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>奥迪A6L 2014款 TFSI 标准型</td>\n",
       "      <td>2014年</td>\n",
       "      <td>13.8万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>21.96万</td>\n",
       "      <td>44.50万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>本田 思域 2016款 1.8L 自动舒适版</td>\n",
       "      <td>2016年</td>\n",
       "      <td>4.8万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>8.87万</td>\n",
       "      <td>15.20万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>大众 朗逸 2015款 1.6L 自动舒适版</td>\n",
       "      <td>2016年</td>\n",
       "      <td>10.5万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>7.27万</td>\n",
       "      <td>14.90万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2006</th>\n",
       "      <td>大众 途观 2013款 1.8TSI 自动两驱舒适版</td>\n",
       "      <td>2014年</td>\n",
       "      <td>7.3万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>13.50万</td>\n",
       "      <td>25.80万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2007</th>\n",
       "      <td>现代ix35 2012款 2.0L 自动两驱精英版GLS</td>\n",
       "      <td>2012年</td>\n",
       "      <td>7.1万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>8.00万</td>\n",
       "      <td>21.30万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2008</th>\n",
       "      <td>宝马3系 2014款 320Li 时尚型</td>\n",
       "      <td>2015年</td>\n",
       "      <td>4.6万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>23.00万</td>\n",
       "      <td>38.90万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2009</th>\n",
       "      <td>标致308 2014款 乐享版 经典 1.6L 手动优尚型</td>\n",
       "      <td>2015年</td>\n",
       "      <td>3.0万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>6.20万</td>\n",
       "      <td>11.50万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2010</th>\n",
       "      <td>大众POLO 2014款 1.6L 自动舒适版</td>\n",
       "      <td>2016年</td>\n",
       "      <td>2.9万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>7.40万</td>\n",
       "      <td>11.30万</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2011 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  0        1        2       3        4  \\\n",
       "0                           leixing  nianfen  licheng  didian  shoujia   \n",
       "1           凯迪拉克ATS-L 2016款 28T 时尚型    2016年   2.5万公里      长沙   16.77万   \n",
       "2              奥迪A6L 2014款 TFSI 标准型    2014年  13.8万公里      长沙   21.96万   \n",
       "3            本田 思域 2016款 1.8L 自动舒适版    2016年   4.8万公里      长沙    8.87万   \n",
       "4            大众 朗逸 2015款 1.6L 自动舒适版    2016年  10.5万公里      长沙    7.27万   \n",
       "...                             ...      ...      ...     ...      ...   \n",
       "2006     大众 途观 2013款 1.8TSI 自动两驱舒适版    2014年   7.3万公里      长沙   13.50万   \n",
       "2007   现代ix35 2012款 2.0L 自动两驱精英版GLS    2012年   7.1万公里      长沙    8.00万   \n",
       "2008           宝马3系 2014款 320Li 时尚型    2015年   4.6万公里      长沙   23.00万   \n",
       "2009  标致308 2014款 乐享版 经典 1.6L 手动优尚型    2015年   3.0万公里      长沙    6.20万   \n",
       "2010        大众POLO 2014款 1.6L 自动舒适版    2016年   2.9万公里      长沙    7.40万   \n",
       "\n",
       "            5  \n",
       "0     yuanjia  \n",
       "1      34.60万  \n",
       "2      44.50万  \n",
       "3      15.20万  \n",
       "4      14.90万  \n",
       "...       ...  \n",
       "2006   25.80万  \n",
       "2007   21.30万  \n",
       "2008   38.90万  \n",
       "2009   11.50万  \n",
       "2010   11.30万  \n",
       "\n",
       "[2011 rows x 6 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.read_csv(\"data/practice6.csv\", header=None)\n",
    "data"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0ad9c61e-efa4-4f31-8f04-8a4aeebba2f1",
   "metadata": {},
   "source": [
    "## 2. 重命名列标签（改成中文或者英文，不要拼音）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "5e067fc2-fe13-4697-b1fa-405bc8c03686",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-24T07:39:51.105241Z",
     "iopub.status.busy": "2022-05-24T07:39:51.104668Z",
     "iopub.status.idle": "2022-05-24T07:39:51.117388Z",
     "shell.execute_reply": "2022-05-24T07:39:51.116719Z",
     "shell.execute_reply.started": "2022-05-24T07:39:51.105215Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>类型</th>\n",
       "      <th>年份</th>\n",
       "      <th>里程</th>\n",
       "      <th>地点</th>\n",
       "      <th>售价</th>\n",
       "      <th>原价</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>leixing</td>\n",
       "      <td>nianfen</td>\n",
       "      <td>licheng</td>\n",
       "      <td>didian</td>\n",
       "      <td>shoujia</td>\n",
       "      <td>yuanjia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>凯迪拉克ATS-L 2016款 28T 时尚型</td>\n",
       "      <td>2016年</td>\n",
       "      <td>2.5万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>16.77万</td>\n",
       "      <td>34.60万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>奥迪A6L 2014款 TFSI 标准型</td>\n",
       "      <td>2014年</td>\n",
       "      <td>13.8万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>21.96万</td>\n",
       "      <td>44.50万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>本田 思域 2016款 1.8L 自动舒适版</td>\n",
       "      <td>2016年</td>\n",
       "      <td>4.8万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>8.87万</td>\n",
       "      <td>15.20万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>大众 朗逸 2015款 1.6L 自动舒适版</td>\n",
       "      <td>2016年</td>\n",
       "      <td>10.5万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>7.27万</td>\n",
       "      <td>14.90万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2006</th>\n",
       "      <td>大众 途观 2013款 1.8TSI 自动两驱舒适版</td>\n",
       "      <td>2014年</td>\n",
       "      <td>7.3万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>13.50万</td>\n",
       "      <td>25.80万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2007</th>\n",
       "      <td>现代ix35 2012款 2.0L 自动两驱精英版GLS</td>\n",
       "      <td>2012年</td>\n",
       "      <td>7.1万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>8.00万</td>\n",
       "      <td>21.30万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2008</th>\n",
       "      <td>宝马3系 2014款 320Li 时尚型</td>\n",
       "      <td>2015年</td>\n",
       "      <td>4.6万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>23.00万</td>\n",
       "      <td>38.90万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2009</th>\n",
       "      <td>标致308 2014款 乐享版 经典 1.6L 手动优尚型</td>\n",
       "      <td>2015年</td>\n",
       "      <td>3.0万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>6.20万</td>\n",
       "      <td>11.50万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2010</th>\n",
       "      <td>大众POLO 2014款 1.6L 自动舒适版</td>\n",
       "      <td>2016年</td>\n",
       "      <td>2.9万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>7.40万</td>\n",
       "      <td>11.30万</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2011 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                 类型       年份       里程      地点       售价  \\\n",
       "0                           leixing  nianfen  licheng  didian  shoujia   \n",
       "1           凯迪拉克ATS-L 2016款 28T 时尚型    2016年   2.5万公里      长沙   16.77万   \n",
       "2              奥迪A6L 2014款 TFSI 标准型    2014年  13.8万公里      长沙   21.96万   \n",
       "3            本田 思域 2016款 1.8L 自动舒适版    2016年   4.8万公里      长沙    8.87万   \n",
       "4            大众 朗逸 2015款 1.6L 自动舒适版    2016年  10.5万公里      长沙    7.27万   \n",
       "...                             ...      ...      ...     ...      ...   \n",
       "2006     大众 途观 2013款 1.8TSI 自动两驱舒适版    2014年   7.3万公里      长沙   13.50万   \n",
       "2007   现代ix35 2012款 2.0L 自动两驱精英版GLS    2012年   7.1万公里      长沙    8.00万   \n",
       "2008           宝马3系 2014款 320Li 时尚型    2015年   4.6万公里      长沙   23.00万   \n",
       "2009  标致308 2014款 乐享版 经典 1.6L 手动优尚型    2015年   3.0万公里      长沙    6.20万   \n",
       "2010        大众POLO 2014款 1.6L 自动舒适版    2016年   2.9万公里      长沙    7.40万   \n",
       "\n",
       "           原价  \n",
       "0     yuanjia  \n",
       "1      34.60万  \n",
       "2      44.50万  \n",
       "3      15.20万  \n",
       "4      14.90万  \n",
       "...       ...  \n",
       "2006   25.80万  \n",
       "2007   21.30万  \n",
       "2008   38.90万  \n",
       "2009   11.50万  \n",
       "2010   11.30万  \n",
       "\n",
       "[2011 rows x 6 columns]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 2.1 可以在读取数据之后，直接暴力执行columns\n",
    "data.columns = [\"类型\", \"年份\", \"里程\", \"地点\", \"售价\", \"原价\"]\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "ad5a92f8-3a2e-4ecf-99dc-5900ac485aca",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-24T07:39:54.177576Z",
     "iopub.status.busy": "2022-05-24T07:39:54.177233Z",
     "iopub.status.idle": "2022-05-24T07:39:54.194956Z",
     "shell.execute_reply": "2022-05-24T07:39:54.194310Z",
     "shell.execute_reply.started": "2022-05-24T07:39:54.177551Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>类型</th>\n",
       "      <th>年份</th>\n",
       "      <th>里程</th>\n",
       "      <th>地点</th>\n",
       "      <th>售价</th>\n",
       "      <th>原价</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>leixing</td>\n",
       "      <td>nianfen</td>\n",
       "      <td>licheng</td>\n",
       "      <td>didian</td>\n",
       "      <td>shoujia</td>\n",
       "      <td>yuanjia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>凯迪拉克ATS-L 2016款 28T 时尚型</td>\n",
       "      <td>2016年</td>\n",
       "      <td>2.5万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>16.77万</td>\n",
       "      <td>34.60万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>奥迪A6L 2014款 TFSI 标准型</td>\n",
       "      <td>2014年</td>\n",
       "      <td>13.8万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>21.96万</td>\n",
       "      <td>44.50万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>本田 思域 2016款 1.8L 自动舒适版</td>\n",
       "      <td>2016年</td>\n",
       "      <td>4.8万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>8.87万</td>\n",
       "      <td>15.20万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>大众 朗逸 2015款 1.6L 自动舒适版</td>\n",
       "      <td>2016年</td>\n",
       "      <td>10.5万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>7.27万</td>\n",
       "      <td>14.90万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2006</th>\n",
       "      <td>大众 途观 2013款 1.8TSI 自动两驱舒适版</td>\n",
       "      <td>2014年</td>\n",
       "      <td>7.3万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>13.50万</td>\n",
       "      <td>25.80万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2007</th>\n",
       "      <td>现代ix35 2012款 2.0L 自动两驱精英版GLS</td>\n",
       "      <td>2012年</td>\n",
       "      <td>7.1万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>8.00万</td>\n",
       "      <td>21.30万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2008</th>\n",
       "      <td>宝马3系 2014款 320Li 时尚型</td>\n",
       "      <td>2015年</td>\n",
       "      <td>4.6万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>23.00万</td>\n",
       "      <td>38.90万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2009</th>\n",
       "      <td>标致308 2014款 乐享版 经典 1.6L 手动优尚型</td>\n",
       "      <td>2015年</td>\n",
       "      <td>3.0万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>6.20万</td>\n",
       "      <td>11.50万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2010</th>\n",
       "      <td>大众POLO 2014款 1.6L 自动舒适版</td>\n",
       "      <td>2016年</td>\n",
       "      <td>2.9万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>7.40万</td>\n",
       "      <td>11.30万</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2011 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                 类型       年份       里程      地点       售价  \\\n",
       "0                           leixing  nianfen  licheng  didian  shoujia   \n",
       "1           凯迪拉克ATS-L 2016款 28T 时尚型    2016年   2.5万公里      长沙   16.77万   \n",
       "2              奥迪A6L 2014款 TFSI 标准型    2014年  13.8万公里      长沙   21.96万   \n",
       "3            本田 思域 2016款 1.8L 自动舒适版    2016年   4.8万公里      长沙    8.87万   \n",
       "4            大众 朗逸 2015款 1.6L 自动舒适版    2016年  10.5万公里      长沙    7.27万   \n",
       "...                             ...      ...      ...     ...      ...   \n",
       "2006     大众 途观 2013款 1.8TSI 自动两驱舒适版    2014年   7.3万公里      长沙   13.50万   \n",
       "2007   现代ix35 2012款 2.0L 自动两驱精英版GLS    2012年   7.1万公里      长沙    8.00万   \n",
       "2008           宝马3系 2014款 320Li 时尚型    2015年   4.6万公里      长沙   23.00万   \n",
       "2009  标致308 2014款 乐享版 经典 1.6L 手动优尚型    2015年   3.0万公里      长沙    6.20万   \n",
       "2010        大众POLO 2014款 1.6L 自动舒适版    2016年   2.9万公里      长沙    7.40万   \n",
       "\n",
       "           原价  \n",
       "0     yuanjia  \n",
       "1      34.60万  \n",
       "2      44.50万  \n",
       "3      15.20万  \n",
       "4      14.90万  \n",
       "...       ...  \n",
       "2006   25.80万  \n",
       "2007   21.30万  \n",
       "2008   38.90万  \n",
       "2009   11.50万  \n",
       "2010   11.30万  \n",
       "\n",
       "[2011 rows x 6 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 2.2 也可以在读取数据的时候指定names\n",
    "data = pd.read_csv(\"data/practice6.csv\", names=[\"类型\", \"年份\", \"里程\", \"地点\", \"售价\", \"原价\"])\n",
    "data"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b3db48e2-6330-4866-81bd-78fb2e26b97e",
   "metadata": {},
   "source": [
    "## 3. 去除重复数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "d82af469-acf2-4a14-b7d2-6efe67f23a53",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-24T07:40:00.534143Z",
     "iopub.status.busy": "2022-05-24T07:40:00.533793Z",
     "iopub.status.idle": "2022-05-24T07:40:00.544445Z",
     "shell.execute_reply": "2022-05-24T07:40:00.543747Z",
     "shell.execute_reply.started": "2022-05-24T07:40:00.534117Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 3.1 判断是否有重复值\n",
    "data.duplicated().is_unique\n",
    "# duplicated函数返回的布尔型Series对象值不唯一，所以存在重复值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "2384832e-b0f2-4750-bb4e-2d4b0f105bc8",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-24T07:40:01.543928Z",
     "iopub.status.busy": "2022-05-24T07:40:01.543604Z",
     "iopub.status.idle": "2022-05-24T07:40:01.557120Z",
     "shell.execute_reply": "2022-05-24T07:40:01.556371Z",
     "shell.execute_reply.started": "2022-05-24T07:40:01.543903Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>类型</th>\n",
       "      <th>年份</th>\n",
       "      <th>里程</th>\n",
       "      <th>地点</th>\n",
       "      <th>售价</th>\n",
       "      <th>原价</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>leixing</td>\n",
       "      <td>nianfen</td>\n",
       "      <td>licheng</td>\n",
       "      <td>didian</td>\n",
       "      <td>shoujia</td>\n",
       "      <td>yuanjia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>凯迪拉克ATS-L 2016款 28T 时尚型</td>\n",
       "      <td>2016年</td>\n",
       "      <td>2.5万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>16.77万</td>\n",
       "      <td>34.60万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>奥迪A6L 2014款 TFSI 标准型</td>\n",
       "      <td>2014年</td>\n",
       "      <td>13.8万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>21.96万</td>\n",
       "      <td>44.50万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>本田 思域 2016款 1.8L 自动舒适版</td>\n",
       "      <td>2016年</td>\n",
       "      <td>4.8万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>8.87万</td>\n",
       "      <td>15.20万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>大众 朗逸 2015款 1.6L 自动舒适版</td>\n",
       "      <td>2016年</td>\n",
       "      <td>10.5万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>7.27万</td>\n",
       "      <td>14.90万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2006</th>\n",
       "      <td>大众 途观 2013款 1.8TSI 自动两驱舒适版</td>\n",
       "      <td>2014年</td>\n",
       "      <td>7.3万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>13.50万</td>\n",
       "      <td>25.80万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2007</th>\n",
       "      <td>现代ix35 2012款 2.0L 自动两驱精英版GLS</td>\n",
       "      <td>2012年</td>\n",
       "      <td>7.1万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>8.00万</td>\n",
       "      <td>21.30万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2008</th>\n",
       "      <td>宝马3系 2014款 320Li 时尚型</td>\n",
       "      <td>2015年</td>\n",
       "      <td>4.6万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>23.00万</td>\n",
       "      <td>38.90万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2009</th>\n",
       "      <td>标致308 2014款 乐享版 经典 1.6L 手动优尚型</td>\n",
       "      <td>2015年</td>\n",
       "      <td>3.0万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>6.20万</td>\n",
       "      <td>11.50万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2010</th>\n",
       "      <td>大众POLO 2014款 1.6L 自动舒适版</td>\n",
       "      <td>2016年</td>\n",
       "      <td>2.9万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>7.40万</td>\n",
       "      <td>11.30万</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2001 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                 类型       年份       里程      地点       售价  \\\n",
       "0                           leixing  nianfen  licheng  didian  shoujia   \n",
       "1           凯迪拉克ATS-L 2016款 28T 时尚型    2016年   2.5万公里      长沙   16.77万   \n",
       "2              奥迪A6L 2014款 TFSI 标准型    2014年  13.8万公里      长沙   21.96万   \n",
       "3            本田 思域 2016款 1.8L 自动舒适版    2016年   4.8万公里      长沙    8.87万   \n",
       "4            大众 朗逸 2015款 1.6L 自动舒适版    2016年  10.5万公里      长沙    7.27万   \n",
       "...                             ...      ...      ...     ...      ...   \n",
       "2006     大众 途观 2013款 1.8TSI 自动两驱舒适版    2014年   7.3万公里      长沙   13.50万   \n",
       "2007   现代ix35 2012款 2.0L 自动两驱精英版GLS    2012年   7.1万公里      长沙    8.00万   \n",
       "2008           宝马3系 2014款 320Li 时尚型    2015年   4.6万公里      长沙   23.00万   \n",
       "2009  标致308 2014款 乐享版 经典 1.6L 手动优尚型    2015年   3.0万公里      长沙    6.20万   \n",
       "2010        大众POLO 2014款 1.6L 自动舒适版    2016年   2.9万公里      长沙    7.40万   \n",
       "\n",
       "           原价  \n",
       "0     yuanjia  \n",
       "1      34.60万  \n",
       "2      44.50万  \n",
       "3      15.20万  \n",
       "4      14.90万  \n",
       "...       ...  \n",
       "2006   25.80万  \n",
       "2007   21.30万  \n",
       "2008   38.90万  \n",
       "2009   11.50万  \n",
       "2010   11.30万  \n",
       "\n",
       "[2001 rows x 6 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 3.2 删除重复数据\n",
    "data = data.drop_duplicates()\n",
    "data"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "71854ffe-72b0-47a2-ad48-d92e9d8deed5",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-18T02:03:03.136610Z",
     "iopub.status.busy": "2022-05-18T02:03:03.136265Z",
     "iopub.status.idle": "2022-05-18T02:03:03.140990Z",
     "shell.execute_reply": "2022-05-18T02:03:03.139855Z",
     "shell.execute_reply.started": "2022-05-18T02:03:03.136583Z"
    }
   },
   "source": [
    "## 4. 处理缺失值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "0f19b7e3-b522-4546-9d2f-ea71c104a884",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-24T07:40:04.532972Z",
     "iopub.status.busy": "2022-05-24T07:40:04.532405Z",
     "iopub.status.idle": "2022-05-24T07:40:04.544862Z",
     "shell.execute_reply": "2022-05-24T07:40:04.544264Z",
     "shell.execute_reply.started": "2022-05-24T07:40:04.532946Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "类型     年份     里程     地点     售价     原价   \n",
       "False  False  False  False  False  False    1928\n",
       "                                   True       73\n",
       "dtype: int64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 4.1 判断是否有缺失值\n",
    "data.isnull().value_counts()\n",
    "# isnull的放回结果中有73处缺失值，都在原价里"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "40c50c33-5fdc-439e-9c3b-3cf7fa5a907a",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-24T07:40:06.558770Z",
     "iopub.status.busy": "2022-05-24T07:40:06.558214Z",
     "iopub.status.idle": "2022-05-24T07:40:06.572541Z",
     "shell.execute_reply": "2022-05-24T07:40:06.571851Z",
     "shell.execute_reply.started": "2022-05-24T07:40:06.558744Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>类型</th>\n",
       "      <th>年份</th>\n",
       "      <th>里程</th>\n",
       "      <th>地点</th>\n",
       "      <th>售价</th>\n",
       "      <th>原价</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>leixing</td>\n",
       "      <td>nianfen</td>\n",
       "      <td>licheng</td>\n",
       "      <td>didian</td>\n",
       "      <td>shoujia</td>\n",
       "      <td>yuanjia</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>凯迪拉克ATS-L 2016款 28T 时尚型</td>\n",
       "      <td>2016年</td>\n",
       "      <td>2.5万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>16.77万</td>\n",
       "      <td>34.60万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>奥迪A6L 2014款 TFSI 标准型</td>\n",
       "      <td>2014年</td>\n",
       "      <td>13.8万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>21.96万</td>\n",
       "      <td>44.50万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>本田 思域 2016款 1.8L 自动舒适版</td>\n",
       "      <td>2016年</td>\n",
       "      <td>4.8万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>8.87万</td>\n",
       "      <td>15.20万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>大众 朗逸 2015款 1.6L 自动舒适版</td>\n",
       "      <td>2016年</td>\n",
       "      <td>10.5万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>7.27万</td>\n",
       "      <td>14.90万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2006</th>\n",
       "      <td>大众 途观 2013款 1.8TSI 自动两驱舒适版</td>\n",
       "      <td>2014年</td>\n",
       "      <td>7.3万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>13.50万</td>\n",
       "      <td>25.80万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2007</th>\n",
       "      <td>现代ix35 2012款 2.0L 自动两驱精英版GLS</td>\n",
       "      <td>2012年</td>\n",
       "      <td>7.1万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>8.00万</td>\n",
       "      <td>21.30万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2008</th>\n",
       "      <td>宝马3系 2014款 320Li 时尚型</td>\n",
       "      <td>2015年</td>\n",
       "      <td>4.6万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>23.00万</td>\n",
       "      <td>38.90万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2009</th>\n",
       "      <td>标致308 2014款 乐享版 经典 1.6L 手动优尚型</td>\n",
       "      <td>2015年</td>\n",
       "      <td>3.0万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>6.20万</td>\n",
       "      <td>11.50万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2010</th>\n",
       "      <td>大众POLO 2014款 1.6L 自动舒适版</td>\n",
       "      <td>2016年</td>\n",
       "      <td>2.9万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>7.40万</td>\n",
       "      <td>11.30万</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1928 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                 类型       年份       里程      地点       售价  \\\n",
       "0                           leixing  nianfen  licheng  didian  shoujia   \n",
       "1           凯迪拉克ATS-L 2016款 28T 时尚型    2016年   2.5万公里      长沙   16.77万   \n",
       "2              奥迪A6L 2014款 TFSI 标准型    2014年  13.8万公里      长沙   21.96万   \n",
       "3            本田 思域 2016款 1.8L 自动舒适版    2016年   4.8万公里      长沙    8.87万   \n",
       "4            大众 朗逸 2015款 1.6L 自动舒适版    2016年  10.5万公里      长沙    7.27万   \n",
       "...                             ...      ...      ...     ...      ...   \n",
       "2006     大众 途观 2013款 1.8TSI 自动两驱舒适版    2014年   7.3万公里      长沙   13.50万   \n",
       "2007   现代ix35 2012款 2.0L 自动两驱精英版GLS    2012年   7.1万公里      长沙    8.00万   \n",
       "2008           宝马3系 2014款 320Li 时尚型    2015年   4.6万公里      长沙   23.00万   \n",
       "2009  标致308 2014款 乐享版 经典 1.6L 手动优尚型    2015年   3.0万公里      长沙    6.20万   \n",
       "2010        大众POLO 2014款 1.6L 自动舒适版    2016年   2.9万公里      长沙    7.40万   \n",
       "\n",
       "           原价  \n",
       "0     yuanjia  \n",
       "1      34.60万  \n",
       "2      44.50万  \n",
       "3      15.20万  \n",
       "4      14.90万  \n",
       "...       ...  \n",
       "2006   25.80万  \n",
       "2007   21.30万  \n",
       "2008   38.90万  \n",
       "2009   11.50万  \n",
       "2010   11.30万  \n",
       "\n",
       "[1928 rows x 6 columns]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 4.2 删除缺失值\n",
    "data = data.dropna()\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "dfad1684-e2c9-4efb-834b-02606ac21cf5",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-24T07:40:09.498022Z",
     "iopub.status.busy": "2022-05-24T07:40:09.497675Z",
     "iopub.status.idle": "2022-05-24T07:40:09.510856Z",
     "shell.execute_reply": "2022-05-24T07:40:09.510140Z",
     "shell.execute_reply.started": "2022-05-24T07:40:09.497996Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>类型</th>\n",
       "      <th>年份</th>\n",
       "      <th>里程</th>\n",
       "      <th>地点</th>\n",
       "      <th>售价</th>\n",
       "      <th>原价</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>凯迪拉克ATS-L 2016款 28T 时尚型</td>\n",
       "      <td>2016年</td>\n",
       "      <td>2.5万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>16.77万</td>\n",
       "      <td>34.60万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>奥迪A6L 2014款 TFSI 标准型</td>\n",
       "      <td>2014年</td>\n",
       "      <td>13.8万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>21.96万</td>\n",
       "      <td>44.50万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>本田 思域 2016款 1.8L 自动舒适版</td>\n",
       "      <td>2016年</td>\n",
       "      <td>4.8万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>8.87万</td>\n",
       "      <td>15.20万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>大众 朗逸 2015款 1.6L 自动舒适版</td>\n",
       "      <td>2016年</td>\n",
       "      <td>10.5万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>7.27万</td>\n",
       "      <td>14.90万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>Smart smart fortwo 2012款 1.0 MHD 硬顶标准版</td>\n",
       "      <td>2014年</td>\n",
       "      <td>5.6万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>4.89万</td>\n",
       "      <td>12.50万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2006</th>\n",
       "      <td>大众 途观 2013款 1.8TSI 自动两驱舒适版</td>\n",
       "      <td>2014年</td>\n",
       "      <td>7.3万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>13.50万</td>\n",
       "      <td>25.80万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2007</th>\n",
       "      <td>现代ix35 2012款 2.0L 自动两驱精英版GLS</td>\n",
       "      <td>2012年</td>\n",
       "      <td>7.1万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>8.00万</td>\n",
       "      <td>21.30万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2008</th>\n",
       "      <td>宝马3系 2014款 320Li 时尚型</td>\n",
       "      <td>2015年</td>\n",
       "      <td>4.6万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>23.00万</td>\n",
       "      <td>38.90万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2009</th>\n",
       "      <td>标致308 2014款 乐享版 经典 1.6L 手动优尚型</td>\n",
       "      <td>2015年</td>\n",
       "      <td>3.0万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>6.20万</td>\n",
       "      <td>11.50万</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2010</th>\n",
       "      <td>大众POLO 2014款 1.6L 自动舒适版</td>\n",
       "      <td>2016年</td>\n",
       "      <td>2.9万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>7.40万</td>\n",
       "      <td>11.30万</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1927 rows × 6 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                          类型     年份       里程  地点      售价  \\\n",
       "1                    凯迪拉克ATS-L 2016款 28T 时尚型  2016年   2.5万公里  长沙  16.77万   \n",
       "2                       奥迪A6L 2014款 TFSI 标准型  2014年  13.8万公里  长沙  21.96万   \n",
       "3                     本田 思域 2016款 1.8L 自动舒适版  2016年   4.8万公里  长沙   8.87万   \n",
       "4                     大众 朗逸 2015款 1.6L 自动舒适版  2016年  10.5万公里  长沙   7.27万   \n",
       "16    Smart smart fortwo 2012款 1.0 MHD 硬顶标准版  2014年   5.6万公里  长沙   4.89万   \n",
       "...                                      ...    ...      ...  ..     ...   \n",
       "2006              大众 途观 2013款 1.8TSI 自动两驱舒适版  2014年   7.3万公里  长沙  13.50万   \n",
       "2007            现代ix35 2012款 2.0L 自动两驱精英版GLS  2012年   7.1万公里  长沙   8.00万   \n",
       "2008                    宝马3系 2014款 320Li 时尚型  2015年   4.6万公里  长沙  23.00万   \n",
       "2009           标致308 2014款 乐享版 经典 1.6L 手动优尚型  2015年   3.0万公里  长沙   6.20万   \n",
       "2010                 大众POLO 2014款 1.6L 自动舒适版  2016年   2.9万公里  长沙   7.40万   \n",
       "\n",
       "          原价  \n",
       "1     34.60万  \n",
       "2     44.50万  \n",
       "3     15.20万  \n",
       "4     14.90万  \n",
       "16    12.50万  \n",
       "...      ...  \n",
       "2006  25.80万  \n",
       "2007  21.30万  \n",
       "2008  38.90万  \n",
       "2009  11.50万  \n",
       "2010  11.30万  \n",
       "\n",
       "[1927 rows x 6 columns]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 删除异常数据\n",
    "data = data[data[\"类型\"] != \"leixing\"]\n",
    "data"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "edfc8967-5d11-44d2-9e53-c189963584c6",
   "metadata": {},
   "source": [
    "## 5. 新增一列数据加汽车品牌"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "29c8b0ae-eb11-4810-8cf5-3e0ca95e677c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-24T08:09:09.315390Z",
     "iopub.status.busy": "2022-05-24T08:09:09.315064Z",
     "iopub.status.idle": "2022-05-24T08:09:09.477451Z",
     "shell.execute_reply": "2022-05-24T08:09:09.476788Z",
     "shell.execute_reply.started": "2022-05-24T08:09:09.315367Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/59/_lbpck214pqcv3v468n7dv6w0000gn/T/ipykernel_16006/2829213348.py:10: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  data[\"汽车品牌\"] = data[\"类型\"].apply(\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>类型</th>\n",
       "      <th>年份</th>\n",
       "      <th>里程</th>\n",
       "      <th>地点</th>\n",
       "      <th>售价</th>\n",
       "      <th>原价</th>\n",
       "      <th>汽车品牌</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>凯迪拉克ATS-L 2016款 28T 时尚型</td>\n",
       "      <td>2016年</td>\n",
       "      <td>2.5万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>16.77万</td>\n",
       "      <td>34.60万</td>\n",
       "      <td>凯迪拉克</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>奥迪A6L 2014款 TFSI 标准型</td>\n",
       "      <td>2014年</td>\n",
       "      <td>13.8万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>21.96万</td>\n",
       "      <td>44.50万</td>\n",
       "      <td>奥迪</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>本田 思域 2016款 1.8L 自动舒适版</td>\n",
       "      <td>2016年</td>\n",
       "      <td>4.8万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>8.87万</td>\n",
       "      <td>15.20万</td>\n",
       "      <td>本田</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>大众 朗逸 2015款 1.6L 自动舒适版</td>\n",
       "      <td>2016年</td>\n",
       "      <td>10.5万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>7.27万</td>\n",
       "      <td>14.90万</td>\n",
       "      <td>大众</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>Smart smart fortwo 2012款 1.0 MHD 硬顶标准版</td>\n",
       "      <td>2014年</td>\n",
       "      <td>5.6万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>4.89万</td>\n",
       "      <td>12.50万</td>\n",
       "      <td>Smart</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2006</th>\n",
       "      <td>大众 途观 2013款 1.8TSI 自动两驱舒适版</td>\n",
       "      <td>2014年</td>\n",
       "      <td>7.3万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>13.50万</td>\n",
       "      <td>25.80万</td>\n",
       "      <td>大众</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2007</th>\n",
       "      <td>现代ix35 2012款 2.0L 自动两驱精英版GLS</td>\n",
       "      <td>2012年</td>\n",
       "      <td>7.1万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>8.00万</td>\n",
       "      <td>21.30万</td>\n",
       "      <td>现代</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2008</th>\n",
       "      <td>宝马3系 2014款 320Li 时尚型</td>\n",
       "      <td>2015年</td>\n",
       "      <td>4.6万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>23.00万</td>\n",
       "      <td>38.90万</td>\n",
       "      <td>宝马</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2009</th>\n",
       "      <td>标致308 2014款 乐享版 经典 1.6L 手动优尚型</td>\n",
       "      <td>2015年</td>\n",
       "      <td>3.0万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>6.20万</td>\n",
       "      <td>11.50万</td>\n",
       "      <td>标致</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2010</th>\n",
       "      <td>大众POLO 2014款 1.6L 自动舒适版</td>\n",
       "      <td>2016年</td>\n",
       "      <td>2.9万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>7.40万</td>\n",
       "      <td>11.30万</td>\n",
       "      <td>大众</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1927 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                          类型     年份       里程  地点      售价  \\\n",
       "1                    凯迪拉克ATS-L 2016款 28T 时尚型  2016年   2.5万公里  长沙  16.77万   \n",
       "2                       奥迪A6L 2014款 TFSI 标准型  2014年  13.8万公里  长沙  21.96万   \n",
       "3                     本田 思域 2016款 1.8L 自动舒适版  2016年   4.8万公里  长沙   8.87万   \n",
       "4                     大众 朗逸 2015款 1.6L 自动舒适版  2016年  10.5万公里  长沙   7.27万   \n",
       "16    Smart smart fortwo 2012款 1.0 MHD 硬顶标准版  2014年   5.6万公里  长沙   4.89万   \n",
       "...                                      ...    ...      ...  ..     ...   \n",
       "2006              大众 途观 2013款 1.8TSI 自动两驱舒适版  2014年   7.3万公里  长沙  13.50万   \n",
       "2007            现代ix35 2012款 2.0L 自动两驱精英版GLS  2012年   7.1万公里  长沙   8.00万   \n",
       "2008                    宝马3系 2014款 320Li 时尚型  2015年   4.6万公里  长沙  23.00万   \n",
       "2009           标致308 2014款 乐享版 经典 1.6L 手动优尚型  2015年   3.0万公里  长沙   6.20万   \n",
       "2010                 大众POLO 2014款 1.6L 自动舒适版  2016年   2.9万公里  长沙   7.40万   \n",
       "\n",
       "          原价   汽车品牌  \n",
       "1     34.60万   凯迪拉克  \n",
       "2     44.50万     奥迪  \n",
       "3     15.20万     本田  \n",
       "4     14.90万     大众  \n",
       "16    12.50万  Smart  \n",
       "...      ...    ...  \n",
       "2006  25.80万     大众  \n",
       "2007  21.30万     现代  \n",
       "2008  38.90万     宝马  \n",
       "2009  11.50万     标致  \n",
       "2010  11.30万     大众  \n",
       "\n",
       "[1927 rows x 7 columns]"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import re\n",
    "\n",
    "# 用apply函数单独对“类型”这一列数据进行处理\n",
    "# 1 使用lambda函数也可以，单独写一个函数也行\n",
    "# 2 分析字符串可以发现汽车品牌在最前面，采用字符串截取的形式提取汽车品牌\n",
    "# 3 用正则表达式的方式找到车牌结束的位置\n",
    "rule1 = r\"[a-zA-Z]|\\s+|\\d+\"\n",
    "rule2 = r\"\\s+|\\d+\"\n",
    "\n",
    "data[\"汽车品牌\"] = data[\"类型\"].apply(\n",
    "    lambda x: x[0 : re.search(rule2, x).start()]\n",
    "    if x[0].isascii()\n",
    "    else x[0 : re.search(rule1, x).start()]\n",
    ")\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "3277b404-d79c-4518-8130-a8cffe6e0e2c",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2022-05-24T08:11:31.140394Z",
     "iopub.status.busy": "2022-05-24T08:11:31.140007Z",
     "iopub.status.idle": "2022-05-24T08:11:31.169275Z",
     "shell.execute_reply": "2022-05-24T08:11:31.167638Z",
     "shell.execute_reply.started": "2022-05-24T08:11:31.140367Z"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/59/_lbpck214pqcv3v468n7dv6w0000gn/T/ipykernel_16006/2228066517.py:10: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  data[\"汽车品牌2\"] = data[\"类型\"].map(\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>类型</th>\n",
       "      <th>年份</th>\n",
       "      <th>里程</th>\n",
       "      <th>地点</th>\n",
       "      <th>售价</th>\n",
       "      <th>原价</th>\n",
       "      <th>汽车品牌</th>\n",
       "      <th>汽车品牌2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>凯迪拉克ATS-L 2016款 28T 时尚型</td>\n",
       "      <td>2016年</td>\n",
       "      <td>2.5万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>16.77万</td>\n",
       "      <td>34.60万</td>\n",
       "      <td>凯迪拉克</td>\n",
       "      <td>凯迪拉克</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>奥迪A6L 2014款 TFSI 标准型</td>\n",
       "      <td>2014年</td>\n",
       "      <td>13.8万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>21.96万</td>\n",
       "      <td>44.50万</td>\n",
       "      <td>奥迪</td>\n",
       "      <td>奥迪</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>本田 思域 2016款 1.8L 自动舒适版</td>\n",
       "      <td>2016年</td>\n",
       "      <td>4.8万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>8.87万</td>\n",
       "      <td>15.20万</td>\n",
       "      <td>本田</td>\n",
       "      <td>本田</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>大众 朗逸 2015款 1.6L 自动舒适版</td>\n",
       "      <td>2016年</td>\n",
       "      <td>10.5万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>7.27万</td>\n",
       "      <td>14.90万</td>\n",
       "      <td>大众</td>\n",
       "      <td>大众</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>Smart smart fortwo 2012款 1.0 MHD 硬顶标准版</td>\n",
       "      <td>2014年</td>\n",
       "      <td>5.6万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>4.89万</td>\n",
       "      <td>12.50万</td>\n",
       "      <td>Smart</td>\n",
       "      <td>Smart</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2006</th>\n",
       "      <td>大众 途观 2013款 1.8TSI 自动两驱舒适版</td>\n",
       "      <td>2014年</td>\n",
       "      <td>7.3万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>13.50万</td>\n",
       "      <td>25.80万</td>\n",
       "      <td>大众</td>\n",
       "      <td>大众</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2007</th>\n",
       "      <td>现代ix35 2012款 2.0L 自动两驱精英版GLS</td>\n",
       "      <td>2012年</td>\n",
       "      <td>7.1万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>8.00万</td>\n",
       "      <td>21.30万</td>\n",
       "      <td>现代</td>\n",
       "      <td>现代</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2008</th>\n",
       "      <td>宝马3系 2014款 320Li 时尚型</td>\n",
       "      <td>2015年</td>\n",
       "      <td>4.6万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>23.00万</td>\n",
       "      <td>38.90万</td>\n",
       "      <td>宝马</td>\n",
       "      <td>宝马</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2009</th>\n",
       "      <td>标致308 2014款 乐享版 经典 1.6L 手动优尚型</td>\n",
       "      <td>2015年</td>\n",
       "      <td>3.0万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>6.20万</td>\n",
       "      <td>11.50万</td>\n",
       "      <td>标致</td>\n",
       "      <td>标致</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2010</th>\n",
       "      <td>大众POLO 2014款 1.6L 自动舒适版</td>\n",
       "      <td>2016年</td>\n",
       "      <td>2.9万公里</td>\n",
       "      <td>长沙</td>\n",
       "      <td>7.40万</td>\n",
       "      <td>11.30万</td>\n",
       "      <td>大众</td>\n",
       "      <td>大众</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1927 rows × 8 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                          类型     年份       里程  地点      售价  \\\n",
       "1                    凯迪拉克ATS-L 2016款 28T 时尚型  2016年   2.5万公里  长沙  16.77万   \n",
       "2                       奥迪A6L 2014款 TFSI 标准型  2014年  13.8万公里  长沙  21.96万   \n",
       "3                     本田 思域 2016款 1.8L 自动舒适版  2016年   4.8万公里  长沙   8.87万   \n",
       "4                     大众 朗逸 2015款 1.6L 自动舒适版  2016年  10.5万公里  长沙   7.27万   \n",
       "16    Smart smart fortwo 2012款 1.0 MHD 硬顶标准版  2014年   5.6万公里  长沙   4.89万   \n",
       "...                                      ...    ...      ...  ..     ...   \n",
       "2006              大众 途观 2013款 1.8TSI 自动两驱舒适版  2014年   7.3万公里  长沙  13.50万   \n",
       "2007            现代ix35 2012款 2.0L 自动两驱精英版GLS  2012年   7.1万公里  长沙   8.00万   \n",
       "2008                    宝马3系 2014款 320Li 时尚型  2015年   4.6万公里  长沙  23.00万   \n",
       "2009           标致308 2014款 乐享版 经典 1.6L 手动优尚型  2015年   3.0万公里  长沙   6.20万   \n",
       "2010                 大众POLO 2014款 1.6L 自动舒适版  2016年   2.9万公里  长沙   7.40万   \n",
       "\n",
       "          原价   汽车品牌  汽车品牌2  \n",
       "1     34.60万   凯迪拉克   凯迪拉克  \n",
       "2     44.50万     奥迪     奥迪  \n",
       "3     15.20万     本田     本田  \n",
       "4     14.90万     大众     大众  \n",
       "16    12.50万  Smart  Smart  \n",
       "...      ...    ...    ...  \n",
       "2006  25.80万     大众     大众  \n",
       "2007  21.30万     现代     现代  \n",
       "2008  38.90万     宝马     宝马  \n",
       "2009  11.50万     标致     标致  \n",
       "2010  11.30万     大众     大众  \n",
       "\n",
       "[1927 rows x 8 columns]"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import re\n",
    "\n",
    "# 用apply函数单独对“类型”这一列数据进行处理\n",
    "# 1 使用lambda函数也可以，单独写一个函数也行\n",
    "# 2 分析字符串可以发现汽车品牌在最前面，采用字符串截取的形式提取汽车品牌\n",
    "# 3 用正则表达式的方式找到车牌结束的位置\n",
    "rule1 = r\"[a-zA-Z]|\\s+|\\d+\"\n",
    "rule2 = r\"\\s+|\\d+\"\n",
    "\n",
    "data[\"汽车品牌2\"] = data[\"类型\"].map(\n",
    "    lambda x: x[0 : re.search(rule2, x).start()]\n",
    "    if x[0].isascii()\n",
    "    else x[0 : re.search(rule1, x).start()]\n",
    ")\n",
    "data"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "aca4429d0ae32578070a9184e1a5af661bf5db42b66a8ffcec4b3f108638c321"
  },
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
